#Nature's Kidneys: the Role of Wetland Reserve Easements in Restoring Water Quality
#Nicole Karwowski and Marin Skidmore
#Replication File
#2025

#Summary table creation 

#packages
library(lubridate)
library(ggplot2)
library(tidyverse)
library(fixest)
library(plm)
library(collapse)
library(stargazer)
library(cobalt)
library(collapse)
library(xtable)
library(spatstat.utils)
library(tableone)

################################################################################
#Create summary table for all, ever-treated, and never-treated
#want standardized difference between ever-treated and never treated
#want mean and then sd in denominator
#put N on the bottom


#set wd
setwd("C:/Users/16308/OneDrive - Montana State University/UW_Madison/Water quality/Replication Package")

#load main dataset
load("Data/main/main_huc12_panel.RData")

#keep only relevant years 
merge<-subset(merge, as.numeric(Year)>=1985)

#keep only observations that have an ammonia, TKN or phosphorous measurement
merge<-subset(merge, !is.na(ammonia_filtered) | !is.na(TP_filtered) | !is.na(TKN_filtered))


treat<-subset(merge, merge$EverTreated==1)
control<-subset(merge, merge$EverTreated==0)

treat1<-treat
control1<-control
merge1<-merge

#only want certain variables in the summary stats
merge<-subset(merge, select=c(ammonia_filtered, TP_filtered, TKN_filtered, 
                              #upstream_sf_ammonia_filtered, upstream_sf_TP_filtered, upstream_sf_TKN_filtered, 
                              RestoredCum, RestoredCumP, RestoredBinary, 
                              #UpstreamRestoredCumSf,UpstreamRestoredCumSfP, 
                              Ppt,Temp,areaacres,cropland,developed,grass_shrub,tree_cover,water,
                              tri_ammonia_water, tri_phosphorus_water,
                              #tri_ammonia, tri_ammonia_water, tri_nitrate, tri_nitrate_water, tri_phosphorus, tri_phosphorus_water,
                              AnimalUnits, EverTreated))

colnames(merge)<-c("ammonia", "phosphorous", "total kjedahl nitrogen", 
                   "restored wetlands (acres)", "restored wetlands (prop)", "restored wetland (0/1)",
                   "ppt","temp", "HUC12 acres", "agland", "developed", "grass/shrub", "treecover", "water",
                   "TRI ammonia", "TRI phosphorus",
                  "animal units", "EverTreated")

treat<-subset(merge, merge$EverTreated==1)
control<-subset(merge, merge$EverTreated==0)

#Create a mean and SD for each group of interest
options(scipen = 999)
all_mean<-as.data.frame(merge %>% fmean())
all_sd<-as.data.frame(merge %>% fsd())
all_var<-as.data.frame(merge %>% fvar())

treat_mean<-as.data.frame(treat %>% fmean())
treat_sd<-as.data.frame(treat %>% fsd())
treat_var<-as.data.frame(treat %>% fvar())

control_mean<-as.data.frame(control %>% fmean())
control_sd<-as.data.frame(control %>% fsd())
control_var<-as.data.frame(control %>% fvar())

#bind the information together
together<-cbind(all_mean, all_sd, all_var, treat_mean, treat_sd, treat_var, control_mean, control_sd, control_var)
colnames(together)<-c("All (mean)", "All (SD)", "All (var)", "Ever-treated (mean)", 
                      "Ever-treated (SD)", "Ever-treated (var)",
                      "Never-treated (mean)", "Never-treated (SD)", "Never-treated (var)")

#create normalized differences
# same as the formula from Imbens and Wooldridge 2007
together$sd_1_2<-(together$`All (mean)`-together$`Ever-treated (mean)`) / (sqrt((together$`All (var)`+together$`Ever-treated (var)`)))

together$sd_1_3<-(together$`All (mean)`-together$`Never-treated (mean)`) / (sqrt((together$`All (var)`+together$`Never-treated (var)`)))

together$sd_2_3<-(together$`Ever-treated (mean)`-together$`Never-treated (mean)`) / (sqrt((together$`Ever-treated (var)`+together$`Never-treated (var)`)))

together<-together[c(1:17),]

together <- together %>% mutate_if(is.numeric, round, digits=3)
together <- together %>% mutate_if(is.numeric, format, nsmall=3)

colnames(together)

#add p-values 
vars <- names(merge)[sapply(merge, is.numeric) & names(merge) != "EverTreated"]

# Create a vector to store p-values
p_values <- sapply(vars, function(v) {
  t.test(merge[[v]] ~ merge$EverTreated)$p.value
})
p_values <- round(p_values, 3)
p_values <- format(p_values, nsmall = 3)


# we can drop the variance variables
together<-subset(together, select=-c(3, 6, 9))

together <- together %>% rename(ND_1_2=sd_1_2, 
                                ND_1_3=sd_1_3,
                                ND_2_3=sd_2_3)

#add paran to standard deviations
together$`All (SD)`<-paste("(", together$`All (SD)`, ")", sep="")
together$`All (SD)`<-str_replace_all(together$`All (SD)`, " ", "")

together$`Ever-treated (SD)`<-paste("(", together$`Ever-treated (SD)`, ")", sep="")
together$`Ever-treated (SD)`<-str_replace_all(together$`Ever-treated (SD)`, " ", "")

together$`Never-treated (SD)`<-paste("(", together$`Never-treated (SD)`, ")", sep="")
together$`Never-treated (SD)`<-str_replace_all(together$`Never-treated (SD)`, " ", "")

#want SD rows to be below the means
#need to reorganize the dataframe
a<-subset(together, select=c(2,4,6))
colnames(a)<-c("All", "Ever-treated", "Never-treated")

b<-subset(together, select=c(1,3,5))
colnames(b)<-c("All", "Ever-treated", "Never-treated")

#put a and b together
c<-rbind(b,a)[rep(seq_len(nrow(b)),each=2)+c(0,nrow(b)),]

#add back in the normalized difference
d<-subset(together, select=c(7:9))

e<-subset(together, select=c(7:9))

#make it empty
e$ND_1_2<-NA
e$ND_1_3<-NA
e$ND_2_3<-NA

f<-rbind(d,e)[rep(seq_len(nrow(d)),each=2)+c(0,nrow(d)),]

#then use cbind
g<-cbind(c,f)

#take out columns 4 and 5
g<-subset(g, select=-c(4,5))
g <- g %>% mutate_if(is.numeric, format, nsmall=3)

#get rid of even numbered row names with SD
variables<-row.names(g)
row.names(g) <- NULL

g$variables<-variables

#move variables to the front
g <- g %>% relocate(variables, .before=All)

#take out the name of the var from the SD row (even numbered rows)
even_indexes<-seq(2,nrow(g),2)
g[even_indexes, "variables"] <- " "

#rename some of the variables 
g<- g %>% rename(" "= variables)
g<- g %>% rename("N. difference (2 vs. 3)"= ND_2_3)

#just need the observations and N in the last row to be added to the table
g[nrow(g) + 1,] = list("Observations", nrow(merge), nrow(treat), nrow(control), " ")
g[nrow(g) + 1,] = list("N HUC12", length(unique(merge1$huc12)), length(unique(treat1$huc12)), length(unique(control1$huc12)), " ")

#add column number headings
headers<-c(" ", "All", "Ever-treated", "Never-treated", "N. difference (2 vs. 3)")
g<-rbind(headers, g)

colnames(g)<-c("", "1", "2", "3", "4")


#add p value column
# Create a full-length vector with p-values in odd rows and blanks in even rows
p_column <- rep("", nrow(g) - 1)  # minus the header row
p_column[seq(1, length(p_values)*2, by = 2)] <- p_values

# Add "p-value" header to the top
p_column <- c("p-value", p_column)

# Append to g
g$p_value <- p_column


#make it a latex table
tab<-xtable(g, align=c("l", "l", "c","c","c","c", "c"), digits=3)
print.xtable(tab, include.rownames=FALSE, hline.after=c(-1, 1, nrow(tab), nrow(tab)-2))

#save it
print.xtable(tab, file="Results/summary_table.tex", include.rownames=FALSE, hline.after=c(-1, 1, nrow(tab), nrow(tab)-2))

